# packages
library(ggplot2)
library(here)
## here() starts at C:/Users/Jonathan/Desktop/unimelb/PhD/missNetsSimulations
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
# true estimates
resultsFiles = list.files(path = here("Output", "20230726_missNetsTrueModels"))
# initialise list
trueCoefList = list()
trueSeList = list()
# index for how missingness is saved
missSaveLabel = c("Peter", "Todd")
missSaveValue = c("NA", "0")
# TODO:: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO:: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# loop to load model parameters
for(netInd in 1:6){
# load in results for each network
load(here("Output", "20230726_missNetsTrueModels", resultsFiles[[netInd]]))
# and take the coefficients only
trueCoefList[[netInd]] = coef(modelres)
trueSeList[[netInd]] = summary(modelres)$coefficients[,2]
}
## Warning: This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## Warning: This object was fit with 'ergm' version 4.2.2 or earlier. Summarizing
## it with version 4.3 or later may return incorrect results or fail.
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", chosenMissLabel))
# set up some lists to put in the ergm re-estimations
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# hmm... what's the best way to handle this....
# I have specific indices,
# but there are different amounts of each file because re-estimations tend to fail
# especially for some combinations
# some regular expressions
# net1Ind = grep("net1", outputList)
# missMod2Ind = grep("missModel2", outputList)
#
# # check which indices overlap
# net1MissMod2 = missMod2Ind[missMod2Ind %in% net1Ind]
# set a specific index for the network
chosenNetInd = 2
# file list for the chosen network index
chosenNetOutFiles = grep(paste("20230814_missNetReest_net", chosenNetInd, sep =""), outputList)
##### For the MNAR files (coefSet1), the Miss Model is always 2... so what I can do is:
## regex the MNAR files
chosenMnarOutFiles = grep(paste("20231011_missErgmNetReest_Miss", chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", "Peter", chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propMiss is fine (or should be.)
# regex to get the first number after the specified text
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# plotting
# note: I think I should have a plot for each missingness proportion
chosenProp = 1
# there should only be 3 miss props, 1 for 10%, 3.5 for 35%, and 6 for 60%
# subset the lists to only be a single chosen proportion
chosenPropIndex = propMissList == chosenProp
modelResChosenPropList = modelResList[chosenPropIndex]
modelSeChosenPropList = modelSeList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
## AltStar
# making the dataset
altStarPlotData = data.frame(altStar = c(unlist(lapply(modelResChosenPropList, `[[`, 2) )),
SE = c(unlist(lapply(modelSeChosenPropList, `[[`, 2))),
missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm")))
# order the plot data
altStarPlotOrd = altStarPlotData[order(altStarPlotData[,"altStar"]),]
# one more ordering with the missing model type
altStarPlotOrd = altStarPlotOrd[order(altStarPlotOrd[,"missModel"]),]
# getting the true altStar value
trueAltStar = trueCoefList[[chosenNetInd]][2]
trueAltStarSE = trueSeList[[chosenNetInd]][2]
# caterpillar?
altStarPlot = ggplot( data = altStarPlotOrd,
aes( x = 1:nrow(altStarPlotOrd), y = altStar, col = missModel)) +
geom_errorbar(aes (ymin = (altStar - 1.96*SE), ymax = (altStar + 1.96*SE), width = 0.4)) +
xlab("") +
ylab("AltStar estimate (95% Confidence Int)") +
geom_hline(yintercept = trueAltStar, col = "darkblue") +
geom_hline(yintercept = 0, col = "black", lty = 2) +
geom_point() +
ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, sep = "")) +
theme_classic() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
# legend.position = "none") +
geom_rect(aes(xmin = -Inf,
xmax = Inf,
ymin = (trueAltStar - 1.96*trueAltStarSE),
ymax = (trueAltStar + 1.96*trueAltStarSE)),
alpha = 0.01,
colour = NA,
fill = "grey") +
ylim(min(altStarPlotData$altStar) - 2*max(abs(altStarPlotData$SE)), max(altStarPlotData$altStar) + 2*max(abs(altStarPlotData$SE)))
altStarPlot
## Gwesp
# making the dataset
gwespPlotData = data.frame(gwesp = c(unlist(lapply(modelResChosenPropList, `[[`, 3) )),
SE = c(unlist(lapply(modelSeChosenPropList, `[[`, 3))),
missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm")))
# order the plot data
gwespPlotOrd = gwespPlotData[order(gwespPlotData[,"gwesp"]),]
# one more ordering with the missing model type
gwespPlotOrd = gwespPlotOrd[order(gwespPlotOrd[,"missModel"]),]
# getting the true gwdeg value
truegwesp = trueCoefList[[chosenNetInd]][3]
truegwespSE = trueSeList[[chosenNetInd]][3]
# caterpillar?
gwespPlot = ggplot( data = gwespPlotOrd,
aes( x = 1:nrow(gwespPlotOrd), y = gwesp, col = missModel)) +
geom_errorbar(aes (ymin = (gwesp - 1.96*SE), ymax = (gwesp + 1.96*SE), width = 0.4)) +
xlab("") +
ylab("GWESP estimate (95% Confidence Int)") +
labs(col = "missType") +
geom_hline(yintercept = truegwesp, col = "darkblue") +
geom_hline(yintercept = 0, col = "black", lty = 2) +
geom_point() +
ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, sep = "")) +
theme_classic() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
# legend.position = "none") +
geom_rect(aes(xmin = -Inf,
xmax = Inf,
ymin = (truegwesp - 1.96*truegwespSE),
ymax = (truegwesp + 1.96*truegwespSE)),
alpha = 0.01,
colour = NA,
fill = "grey") +
ylim(min(gwespPlotOrd$gwesp) - 2*max(abs(gwespPlotOrd$SE)), max(gwespPlotOrd$gwesp) + 2*max(abs(gwespPlotOrd$SE)))
gwespPlot
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# code to try computing a fail rate dataset
# fixed value for the amount of estimation attempts
estAttempts = 50
# choose a proportion
chosenProp = 1
# annoyingly how the missingness is saved is missing in this set of results so...
# there should only be one way the missingness is saved, keep it as a string
chosenMiss = chosenMissValue
# subset the lists to only be a single chosen proportion
# get the index
chosenPropIndex = propMissList == chosenProp
# and subset
propMissChosenPropList = propMissList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
# these lists will always be used so...
# there should only be one missing proportion
# note that there can sometimes be more than one missingness proportion in the list...
# turn the list of missingness proportions into a single numeric value
failMissProp = as.numeric(names(table(unlist(propMissChosenPropList))))
# check to make sure there's only one proportion
if(length(failMissProp) != 1){
stop("More than one missingness proportion after subsetting")
}
# calculate the success rate
successRate = as.numeric(table(unlist(missModelChosenPropList))/estAttempts)
# and which miss model it is, these can technically range from 1 to 4
failMissModel = as.numeric(names(table(unlist(missModelChosenPropList))))
# and put it all together
failData = data.frame(failRate = (1 - successRate),
netInd = rep(chosenNetInd, length(failMissModel)),
missModel = failMissModel,
missProp = rep(failMissProp, length(failMissModel)),
missSave = rep(chosenMiss, length(failMissModel)))
I want these functions to take the entire file list, choose specific networks and missingness proportions. The thing is, I might need a few helper functions beforehand to get the right data….
Multiple smaller functions doing very specific things are preferred over one giant function that does a lot of stuff all at once
### Function to turn the data from the extracted lists into plot-friendly format
# start from the list of coefficients and standard errors
# these can have varying rows per model because not all re-estimations converge
prepMissPlot = function(modelResList, modelSeList, missModelList, propMissList, chosenProp, chosenPara){
## prepMissPlot(modelResList, modelSeList, missModelList, propMissList, chosenProp, chosenPara) takes
## four different containing various estimated values and extracted indices to be used
## in making a plot-friendly dataset.
##
## Input:
## - modelResList: A k-long list containing the estimated model parameters for k re-estimated models.
##
## - modelSeList: A k-long list containing the estimated standard errors for k re-estimated models.
##
## - missModelList: A k-long list containing the chosen missingness model index for k re-estimated models.
##
## - propMissList: A k-long list containing the missingness proportion index for k re-estimated models.
##
## - chosenProp: The specified proportion of missingness to plot. Three possible values here:
## 1 = 10%, 3 = 35%, 6 = 60%.
##
## - chosenPara: The specified parameter to plot. Three possible values here:
## "edges" for edges, "altStar" for alternating stars, "gwesp" for gwesp.
##
## Output:
## - A k x 3 dataframe containing the parameter values, standard error values, and missing model index.
## The dataframe is ordered by parameter value (smallest to largest) for each missingness model.
## Check to make sure all the lists are of the same length
listLengths = lapply(list(modelResList, modelSeList, missModelList, propMissList), FUN = length)
## A unit test that spits out an error if there's more than one unique length in the list of lengths
if( length(unique(listLengths)) != 1 ){
stop("Lists have differing lengths")
}
# subset the lists to only be a single chosen proportion
chosenPropIndex = propMissList == chosenProp
modelResChosenPropList = modelResList[chosenPropIndex]
modelSeChosenPropList = modelSeList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
# Specify indices for the chosen parameter
# doing it this way because parameter names get very lengthy
paraIndex = c(1:3)
modelParaNames = c("edges", "altStar", "gwesp")
chosenParaIndex = paraIndex[modelParaNames == chosenPara]
# making the dataset
plotData = data.frame(parameter = c(unlist(lapply(modelResChosenPropList, `[[`, chosenParaIndex) )),
SE = c(unlist(lapply(modelSeChosenPropList, `[[`, chosenParaIndex))),
missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm")))
# change to allow 4 = mnarErgm and change missModel 2 = mcarERGM
# order the plot data
plotOrd = plotData[order(plotData[,"parameter"]),]
# one more ordering with the missing model type
plotOrd = plotOrd[order(plotOrd[,"missModel"]),]
# return the ordered plot data
return(plotOrd)
}
## actual plotting function
missCaterpillarPlot = function(plotData, truePara, trueSe, chosenPara, chosenNetInd, chosenProp, chosenMiss){
## missCaterpillarPlot(plotData, truePara, trueSe, chosenPara, chosenNetInd, chosenProp) is a very specialised
## plotting function that produces a caterpillar plot to compare parameter estimates for very specific data.
##
## Input:
## - plotData: Needs to be a k x 3 data frame for k re-estimated models containing parameter values,
## standard error values, and a missing model index. Ordering is optional, but visually
## useful. Ideally from prepMissPlot().
##
## - truePara: A single float (can technically be integer, but unlikely). Represents the true model estimate.
##
## - trueSe: A single float (can technically be integer, but unlikely). Represents the true standard error.
##
## - chosenPara: A string with three (but really two) options. The chosen string is the parameter that is
## plotted. The options, in order, are "edges", "altStar", and "gwesp"
##
## - chosenNetInd: An integer between 1 to 6 for current purposes. Each integer represents one of the six
## datasets from UCINet chosen for the current round of re-estimations.
##
## - chosenProp: A single integer or float with three options representing the proportion of missingness.
## Options are 1 = 10%, 3.5 = 35%, 6 = 60%.
##
## - chosenMiss: A value to indicate what missingness is saved as (e.g., NA is Peter, 0 is Todd).
## Input should be strings that are directly used for the label.
##
## Output:
## - The output should be a caterpillar plot containing with k datapoints for k re-estimated models.
## Colours are labelled and represent the missingness model used. True values are represented as a
## grey rectangle. All error bars or other depictions of spread are 95% confidence intervals. Plot
## title should also be adaptive to the parameters in the function.
# requires ggplot2 to work
require(ggplot2)
# caterpillar?
caterPlot = ggplot( data = plotData,
aes( x = 1:nrow(plotData), y = parameter, col = missModel)) +
geom_errorbar(aes (ymin = (parameter - 1.96*SE), ymax = (parameter + 1.96*SE), width = 0.4)) +
xlab("") +
ylab(paste(chosenPara,"estimate (95% Confidence Int)", sep = " ")) +
labs(col = "missType") +
geom_hline(yintercept = truePara, col = "darkblue") +
geom_hline(yintercept = 0, col = "black", lty = 2) +
geom_point() +
ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, " - Miss", chosenMiss ,sep = "")) +
theme_classic() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
geom_rect(aes(xmin = -Inf,
xmax = Inf,
ymin = (truePara - 1.96*trueSe),
ymax = (truePara + 1.96*trueSe)),
alpha = 0.01,
colour = NA,
fill = "grey") +
ylim(min(min(plotData$parameter) - 2*max(abs(plotData$SE)), (truePara - 1.96*trueSe)),
max(max(plotData$parameter) + 2*max(abs(plotData$SE)), (truePara + 1.96*trueSe)))
# print out the plot
caterPlot
}
## function for calculating failure rate (for each datafile read in)
# a function for this because I overwrite which networks I read in for the caterpillar plots
# this function captures every combination of which missingness model, which chosen missingness, which missingness proportion, and which network.
prepFailPlot = function(propMissList, missModelList, chosenNetInd, chosenProp, chosenMiss, estAttempts = 50){
## prepFailPlot(propMissList, missModelList, chosenNetInd, chosenProp, chosenMiss, estAttempts = 50) calculates
## the failure rates of re-estimations of degraded networks with specific object type requirements.
##
## Input:
## - missModelList: A k-long list containing the chosen missingness model index for k re-estimated models.
## Can technically range between 1 to 4.
##
## - propMissList: A k-long list containing the missingness proportion index for k re-estimated models.
##
## - chosenNetInd: An integer between 1 to 6 for current purposes. Each integer represents one of the six
## datasets from UCINet chosen for the current round of re-estimations.
##
## - chosenProp: A single integer or float with three options representing the proportion of missingness.
## Options are 1 = 10%, 3 = 35% (... yes, it's annoying.), 6 = 60%.
##
## - chosenMiss: A string to indicate what missingness is saved as (e.g., "NA" is Peter, "0" is Todd).
##
## Output:
## - The output should be a data frame containing the failure rate for the specified combination of network,
## proportion of missingness, how the missingness is saved, and all the missingness models in the input list.
# subset the lists to only be a single chosen proportion
# get the index
chosenPropIndex = propMissList == chosenProp
# and subset
propMissChosenPropList = propMissList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
# turn the list of missingness proportions into a single numeric value
failMissProp = as.numeric(names(table(unlist(propMissChosenPropList))))
# check to make sure there's only one proportion
if(length(failMissProp) != 1){
stop("More than one missingness proportion after subsetting")
}
# calculate the success rate
successRate = as.numeric(table(unlist(missModelChosenPropList))/estAttempts)
# and which miss model it is, these can technically range from 1 to 4
failMissModel = as.numeric(names(table(unlist(missModelChosenPropList))))
# and put it all together
failData = data.frame(failRate = (1 - successRate),
netInd = rep(chosenNetInd, length(failMissModel)),
missModel = failMissModel,
missProp = rep(failMissProp, length(failMissModel)),
missSave = rep(chosenMiss, length(failMissModel)))
# return the data frame
return(failData)
}
## no need for a function to plot fail rates for each combination since I would only want one plot
# choose a network
chosenNetInd = 2
# generate relative bias and relative variance (...standard error?) data structures
relBiasData = data.frame(rBias = c(), netInd = c(), missSave = c(), propMiss = c(), missModel = c(), paraName = c())
relSeData = data.frame(rSe = c(), netInd = c(), missSave = c(), propMiss = c(), missModel = c(), paraName = c())
# select a parameter
modelParaOptions = c("edges", "altStar", "gwesp")
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
## parameter SE missModel
## 8 -3.572767 1.424147 Indep
## 11 -1.985151 2.002649 Indep
## 16 -1.838034 2.499750 Indep
## 4 -1.770333 2.842933 Indep
## 5 -1.659360 2.455153 Indep
## 17 -1.632414 2.516041 Indep
## parameter SE missModel
## 7 -2.667740 1.1581335 Indep
## 6 -2.651687 1.1342895 Indep
## 10 -2.519181 0.8830086 Indep
## 13 -2.484591 0.9431191 Indep
## 14 -2.391916 0.9567384 Indep
## 18 -2.339140 0.8401319 Indep
## parameter SE missModel
## 15 3.713921 0.4693278 Indep
## 3 3.805494 0.5092882 Indep
## 18 3.847856 0.5059986 Indep
## 12 3.867887 0.5433778 Indep
## 17 3.937885 0.5014629 Indep
## 11 3.945033 0.5001198 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# test run for the fail rate data
# a data frame to initialise
failRateData = data.frame()
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20230814_missNetReest_net",
"20230814_missNetReest_net",
"20231010_missNetReest_net")
chunkMnarNames = c("20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all chosen networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 27 -2.0450471 1.355888 Indep
## 17 -1.6887316 1.372465 Indep
## 46 -1.0782637 1.986377 Indep
## 14 -1.0079846 1.794369 Indep
## 30 -0.9901474 1.690993 Indep
## 23 -0.9677308 1.709223 Indep
## parameter SE missModel
## 29 -2.380226 0.8948844 Indep
## 34 -2.332890 0.8567828 Indep
## 39 -2.225284 0.6481276 Indep
## 12 -2.203632 0.7067738 Indep
## 25 -2.197335 0.7339564 Indep
## 19 -2.194530 0.6131555 Indep
## parameter SE missModel
## 42 2.795684 0.4150812 Indep
## 35 2.841932 0.3840148 Indep
## 5 2.906557 0.3822707 Indep
## 33 2.937962 0.3854581 Indep
## 47 2.939097 0.4309207 Indep
## 11 2.950955 0.3905500 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 13 -5.050360 NA Indep
## 9 -4.101907 0.4138049 Indep
## 12 -4.095339 0.3877220 Indep
## 5 -4.089140 0.3838097 Indep
## 2 -4.052289 0.4108890 Indep
## 14 -4.042779 0.4425365 Indep
## parameter SE missModel
## 14 -0.4271309 0.2344825 Indep
## 1 -0.3689833 0.2283937 Indep
## 9 -0.3686057 0.2258536 Indep
## 2 -0.3471691 0.2368848 Indep
## 17 -0.3043420 0.2638864 Indep
## 16 -0.2999977 0.2291898 Indep
## parameter SE missModel
## 13 0.5529485 NA Indep
## 15 1.3536200 0.3463476 Indep
## 4 1.4155391 0.3970444 Indep
## 10 1.5321071 0.3904505 Indep
## 3 1.5354860 0.3464884 Indep
## 11 1.5899620 0.3939409 Indep
## parameter SE missModel
## 33 -6.661065 1.161756 Indep
## 43 -6.520892 1.108532 Indep
## 9 -6.430122 1.040719 Indep
## 42 -6.423533 1.099057 Indep
## 40 -6.348662 1.106593 Indep
## 10 -6.331207 1.156858 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 28 -0.9007415 0.2363367 Indep
## 45 -0.7825025 0.2400551 Indep
## 44 -0.7622703 0.2476933 Indep
## 18 -0.7536412 0.2325391 Indep
## 3 -0.7459583 0.2495798 Indep
## 16 -0.7435994 0.2477373 Indep
## parameter SE missModel
## 10 1.053478 0.1945004 Indep
## 29 1.204949 0.1979167 Indep
## 19 1.224828 0.2129165 Indep
## 42 1.239121 0.2127094 Indep
## 33 1.246916 0.2216857 Indep
## 9 1.261516 0.2118016 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231004_missNetReest_net",
"20231004_missNetReest_net",
"20231004_missNetReest_net",
"20231010_missNetReest_net")
chunkMnarNames = c("20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 7 -3.3781085 1.571063 Indep
## 6 -2.8192660 1.761062 Indep
## 5 -1.2349580 3.762309 Indep
## 10 -1.0923401 4.398689 Indep
## 2 -0.9781865 3.853383 Indep
## 8 -0.5903796 2.911597 Indep
## parameter SE missModel
## 12 -4.558152 4.043885 Indep
## 14 -3.474548 2.506201 Indep
## 13 -3.227319 2.943347 Indep
## 1 -3.187661 2.242548 Indep
## 16 -2.702853 1.253112 Indep
## 15 -2.632289 1.020986 Indep
## parameter SE missModel
## 4 3.323302 0.5682986 Indep
## 1 3.437627 0.6409324 Indep
## 14 3.484401 0.6621262 Indep
## 8 3.566714 0.6259340 Indep
## 3 3.696938 0.6449210 Indep
## 11 3.741881 0.6132945 Indep
## parameter SE missModel
## 16 -2.540926 1.468209 Indep
## 33 -2.529681 1.691315 Indep
## 35 -2.427015 1.466072 Indep
## 29 -2.020486 1.593190 Indep
## 26 -1.943041 1.608631 Indep
## 1 -1.765409 1.619538 Indep
## parameter SE missModel
## 12 -3.536047 1.825336 Indep
## 23 -3.329675 2.333945 Indep
## 18 -2.818693 1.409608 Indep
## 37 -2.728979 1.230028 Indep
## 34 -2.644825 1.234967 Indep
## 10 -2.596926 1.057250 Indep
## parameter SE missModel
## 3 2.497359 0.4892193 Indep
## 7 2.510058 0.4429165 Indep
## 30 2.617554 0.5050321 Indep
## 2 2.640901 0.5463155 Indep
## 27 2.712829 0.4962674 Indep
## 13 2.731805 0.4496286 Indep
## parameter SE missModel
## 6 -4.763305 0.5927591 Indep
## 12 -4.423675 0.5544808 Indep
## 1 -4.256021 0.5488157 Indep
## 15 -4.180141 0.5269770 Indep
## 2 -4.162467 0.5045574 Indep
## 5 -4.153906 0.5770239 Indep
## parameter SE missModel
## 6 -0.6946950 0.2194906 Indep
## 5 -0.6897158 0.2631167 Indep
## 15 -0.6294828 0.2489500 Indep
## 12 -0.5802322 0.2540797 Indep
## 1 -0.5784914 0.2739039 Indep
## 10 -0.5771192 0.2790234 Indep
## parameter SE missModel
## 4 1.137951 0.4996599 Indep
## 8 1.178245 0.4959795 Indep
## 14 1.329230 0.3960599 Indep
## 7 1.429522 0.4758393 Indep
## 13 1.560362 0.4546985 Indep
## 17 1.770557 0.5469616 Indep
## parameter SE missModel
## 9 -6.980429 1.139007 Indep
## 11 -6.766356 1.286395 Indep
## 45 -6.555106 1.242611 Indep
## 48 -6.540552 1.100341 Indep
## 25 -6.434793 1.285338 Indep
## 41 -6.358277 1.639629 Indep
## parameter SE missModel
## 14 -1.277323 0.3909004 Indep
## 23 -1.185686 0.2943182 Indep
## 32 -1.167117 0.2680503 Indep
## 31 -1.159366 0.3823150 Indep
## 16 -1.136381 0.3262691 Indep
## 7 -1.063438 0.2624494 Indep
## parameter SE missModel
## 41 0.8504767 0.2874547 Indep
## 36 0.9067248 0.2625241 Indep
## 50 1.1343697 0.3153175 Indep
## 18 1.2385265 0.2613634 Indep
## 22 1.2590632 0.3627478 Indep
## 27 1.2599829 0.3063573 Indep
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231011_missNetReest_net",
"20231011_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 5 -5.789352 2.062231 Indep
## 20 -4.598751 1.850594 Indep
## 8 -3.903931 2.399643 Indep
## 12 -3.574117 2.122361 Indep
## 9 -2.868342 1.920461 Indep
## 21 -1.942612 1.960238 Indep
## parameter SE missModel
## 7 -12.189565 14.285280 Indep
## 10 -8.636441 7.301452 Indep
## 1 -7.742465 8.269611 Indep
## 3 -7.435589 7.489878 Indep
## 14 -5.732763 5.862388 Indep
## 2 -3.727859 4.052053 Indep
## parameter SE missModel
## 4 2.565689 0.6949242 Indep
## 21 2.737034 0.7987535 Indep
## 6 2.812536 0.7580205 Indep
## 16 3.077702 0.9303100 Indep
## 18 3.192135 0.7561982 Indep
## 19 3.204291 0.7683349 Indep
## parameter SE missModel
## 6 -9.066671 NA Indep
## 35 -3.983056 1.482096 Indep
## 25 -3.688016 1.425409 Indep
## 10 -2.518948 1.491961 Indep
## 24 -2.343804 2.379993 Indep
## 28 -2.265829 2.084995 Indep
## parameter SE missModel
## 13 -10.600876 9.199102 Indep
## 12 -7.608177 5.976598 Indep
## 4 -6.967102 4.550087 Indep
## 8 -5.107308 3.390178 Indep
## 32 -3.863878 3.145042 Indep
## 16 -3.732614 2.720044 Indep
## parameter SE missModel
## 6 1.157657 NA Indep
## 11 1.498300 0.8333988 Indep
## 26 1.608398 0.7615361 Indep
## 17 2.343988 0.8712003 Indep
## 1 2.363417 0.6172139 Indep
## 14 2.388523 0.5638172 Indep
## parameter SE missModel
## 18 -7.915567 NA Indep
## 10 -6.006189 NA Indep
## 4 -5.514273 1.1385876 Indep
## 15 -5.061362 0.9744425 Indep
## 13 -4.919149 1.1736797 Indep
## 8 -4.631797 0.8727901 Indep
## parameter SE missModel
## 4 -1.0541759 0.1900847 Indep
## 13 -0.8025529 0.2567004 Indep
## 19 -0.7912228 0.2738069 Indep
## 15 -0.7703994 0.2574297 Indep
## 2 -0.7660896 0.3085943 Indep
## 11 -0.7373803 0.3518431 Indep
## parameter SE missModel
## 10 0.7233999 NA Indep
## 6 0.8481073 0.6061540 Indep
## 1 1.6349917 0.7578088 Indep
## 12 1.8921170 0.7662248 Indep
## 20 1.9075172 0.8669149 Indep
## 11 1.9556487 0.6164461 Indep
## parameter SE missModel
## 4 -8.649119 2.080996 Indep
## 26 -8.272475 2.215487 Indep
## 11 -7.824375 1.694336 Indep
## 47 -7.136581 2.073979 Indep
## 32 -6.928534 1.651045 Indep
## 33 -6.845862 1.545135 Indep
## parameter SE missModel
## 44 -2.062308 0.9548628 Indep
## 41 -1.750173 1.2046102 Indep
## 10 -1.622288 0.7202489 Indep
## 14 -1.518258 0.6462700 Indep
## 28 -1.463635 0.9698189 Indep
## 1 -1.456279 0.4954156 Indep
## parameter SE missModel
## 47 0.6587452 0.4476198 Indep
## 26 0.6732766 0.4701941 Indep
## 11 0.8695871 0.5348307 Indep
## 4 0.9417435 0.4040563 Indep
## 9 1.0394754 0.4892135 Indep
## 20 1.0694637 0.5600053 Indep
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231012_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 8 -2.601463 1.256855 Indep
## 24 -2.413046 1.108934 Indep
## 10 -2.395926 1.374799 Indep
## 15 -2.383965 1.253623 Indep
## 9 -2.176266 1.247728 Indep
## 38 -2.170672 1.111399 Indep
## parameter SE missModel
## 5 -2.209327 0.5979698 Indep
## 23 -2.189741 0.6877216 Indep
## 30 -2.066400 0.5400037 Indep
## 4 -2.026562 0.4440535 Indep
## 13 -1.986430 0.4411822 Indep
## 39 -1.977470 0.4192232 Indep
## parameter SE missModel
## 2 2.409912 0.2867801 Indep
## 25 2.416764 0.2709456 Indep
## 3 2.762928 0.3026043 Indep
## 31 2.874716 0.3174043 Indep
## 26 2.911591 0.3013970 Indep
## 6 2.946111 0.3113748 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 2 -1.6490952 1.703432 Indep
## 9 -1.4773923 1.747493 Indep
## 23 -1.3377862 1.848695 Indep
## 19 -0.9747808 1.645787 Indep
## 12 -0.8558571 2.243823 Indep
## 5 -0.7947030 2.103760 Indep
## parameter SE missModel
## 21 -2.568440 1.1667600 Indep
## 14 -2.378623 0.8922353 Indep
## 36 -2.341166 0.9318989 Indep
## 8 -2.336268 1.1878642 Indep
## 46 -2.330456 0.8169489 Indep
## 48 -2.304476 1.1932852 Indep
## parameter SE missModel
## 5 1.549235 0.3025049 Indep
## 29 1.710355 0.3314923 Indep
## 24 1.878453 0.3544091 Indep
## 44 1.900765 0.3554659 Indep
## 18 2.099150 0.3389286 Indep
## 39 2.122251 0.3851450 Indep
## parameter SE missModel
## 29 -1.6332365 1.153097 Indep
## 48 -1.3210907 1.395869 Indep
## 22 -0.9199276 1.420857 Indep
## 18 -0.7142808 1.432538 Indep
## 12 -0.6703803 1.449203 Indep
## 10 -0.6670701 1.636150 Indep
## parameter SE missModel
## 34 -2.003352 0.6859301 Indep
## 26 -1.947103 0.6863618 Indep
## 44 -1.940004 0.6037775 Indep
## 38 -1.911503 0.5101568 Indep
## 30 -1.903417 0.4739233 Indep
## 6 -1.901365 0.7485541 Indep
## parameter SE missModel
## 48 1.591746 0.3083361 Indep
## 4 1.612123 0.2829673 Indep
## 18 1.694798 0.2803756 Indep
## 39 1.788530 0.2972528 Indep
## 26 1.793877 0.2787256 Indep
## 6 1.799936 0.2751293 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 6 -4.264797 0.3877630 Indep
## 4 -4.192364 0.3555877 Indep
## 11 -4.155292 0.3629730 Indep
## 15 -4.110912 0.3622067 Indep
## 3 -4.093634 0.3679711 Indep
## 14 -4.090357 0.3716855 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 1 -0.1933919 0.2208256 Indep
## 11 -0.1928590 0.2280006 Indep
## 7 -0.1823017 0.2226638 Indep
## 14 -0.1312494 0.2360561 Indep
## 15 -0.1227517 0.2318603 Indep
## 13 -0.1205444 0.2556446 Indep
## parameter SE missModel
## 6 0.895099 0.2581558 Indep
## 9 1.169641 0.3098932 Indep
## 8 1.205350 0.3021720 Indep
## 5 1.214879 0.3145655 Indep
## 10 1.268217 0.3100610 Indep
## 4 1.294713 0.3410631 Indep
## parameter SE missModel
## 9 -7.562327 1.097650 Indep
## 5 -7.320652 1.176731 Indep
## 20 -7.263994 1.127838 Indep
## 33 -7.255869 1.161143 Indep
## 2 -7.248503 1.104335 Indep
## 50 -7.185518 1.135164 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 49 -0.5511893 0.2380867 Indep
## 28 -0.5461336 0.2205140 Indep
## 18 -0.5001765 0.2197595 Indep
## 6 -0.4852246 0.2300400 Indep
## 43 -0.4411331 0.2405094 Indep
## 41 -0.4401505 0.2326350 Indep
## parameter SE missModel
## 10 0.7147952 0.1443590 Indep
## 20 0.7862908 0.1544625 Indep
## 5 0.8066656 0.1520125 Indep
## 9 0.8667842 0.1670487 Indep
## 40 0.8687591 0.1599717 Indep
## 11 0.8730449 0.1547718 Indep
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231012_missNetReest_net",
"20231012_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 13 -3.337059 0.8859680 Indep
## 2 -2.860776 0.7802048 Indep
## 18 -2.851610 1.0974701 Indep
## 37 -2.726147 0.8829394 Indep
## 9 -2.508242 0.9298530 Indep
## 3 -2.472002 0.8998251 Indep
## parameter SE missModel
## 15 -1.436287 0.4408012 Indep
## 44 -1.234459 0.5375122 Indep
## 4 -1.200908 0.4771346 Indep
## 38 -1.151209 0.4432072 Indep
## 12 -1.119609 0.3651799 Indep
## 33 -1.107068 0.5208508 Indep
## parameter SE missModel
## 13 0.8143432 0.1433313 Indep
## 22 0.9230283 0.1460986 Indep
## 11 0.9431998 0.1522376 Indep
## 1 0.9683054 0.1539687 Indep
## 30 0.9840596 0.1521053 Indep
## 39 1.0125468 0.1555323 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 18 -3.903673 1.139670 Indep
## 21 -2.816481 1.616045 Indep
## 19 -2.459853 1.432304 Indep
## 47 -2.184766 1.735768 Indep
## 4 -1.885313 1.392126 Indep
## 6 -1.847751 1.171954 Indep
## parameter SE missModel
## 50 -2.431769 0.9117054 Indep
## 41 -1.988648 0.9647086 Indep
## 38 -1.952683 0.7673793 Indep
## 25 -1.862815 1.0076413 Indep
## 34 -1.652086 0.6791568 Indep
## 27 -1.521875 0.7766990 Indep
## parameter SE missModel
## 18 0.1781799 0.1574842 Indep
## 21 0.3824131 0.1777333 Indep
## 19 0.4221073 0.1832613 Indep
## 9 0.4592685 0.1906219 Indep
## 47 0.4712505 0.1916285 Indep
## 5 0.5133984 0.1805051 Indep
## parameter SE missModel
## 6 -2.659930 0.8906202 Indep
## 43 -2.484435 0.9949309 Indep
## 42 -2.379924 1.0514690 Indep
## 2 -2.249203 1.0359835 Indep
## 19 -2.083134 1.2432392 Indep
## 35 -1.883939 1.1410617 Indep
## parameter SE missModel
## 29 -1.382605 0.6180631 Indep
## 8 -1.350535 0.4879766 Indep
## 24 -1.324427 0.4870290 Indep
## 13 -1.320548 0.5947146 Indep
## 20 -1.271720 0.5316526 Indep
## 11 -1.204599 0.5222904 Indep
## parameter SE missModel
## 1 0.4634877 0.1972899 Indep
## 38 0.5640863 0.1936564 Indep
## 19 0.5753617 0.2009085 Indep
## 31 0.5896167 0.2008107 Indep
## 33 0.6195691 0.1980860 Indep
## 50 0.6667230 0.2139163 Indep
## parameter SE missModel
## 2 -4.417349 0.4255926 Indep
## 1 -4.366253 0.3996069 Indep
## 3 -4.016838 0.3891978 Indep
## 6 -4.546791 0.4310100 mcarERGM
## 5 -4.453206 0.4022960 mcarERGM
## 7 -4.263979 0.3547582 mcarERGM
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 3 -0.02212587 0.2536399 Indep
## 1 0.30771625 0.2589552 Indep
## 2 0.39936734 0.2535769 Indep
## 4 -0.14067356 0.2359803 mcarERGM
## 7 -0.04922777 0.2510923 mcarERGM
## 5 0.43782339 0.2521844 mcarERGM
## parameter SE missModel
## 2 0.6856637 0.2854993 Indep
## 1 0.8319856 0.3128379 Indep
## 3 1.2445683 0.3393180 Indep
## 6 0.4876890 0.2788660 mcarERGM
## 5 0.6333639 0.2932883 mcarERGM
## 7 1.4323621 0.3900154 mcarERGM
## parameter SE missModel
## 9 -9.680129 1.321978 Indep
## 24 -9.409439 1.341914 Indep
## 19 -9.138619 1.273927 Indep
## 22 -8.715590 1.274077 Indep
## 5 -8.703109 1.269927 Indep
## 14 -8.598590 1.232700 Indep
## parameter SE missModel
## 34 -0.4271915 0.2338981 Indep
## 35 -0.2108234 0.2405158 Indep
## 29 -0.1825741 0.2283651 Indep
## 39 -0.1809863 0.2414358 Indep
## 12 -0.1535835 0.2152647 Indep
## 49 -0.0967697 0.2282145 Indep
## parameter SE missModel
## 24 0.2549825 0.1382805 Indep
## 43 0.2645356 0.1409349 Indep
## 3 0.3558289 0.1495326 Indep
## 10 0.3681067 0.1433609 Indep
## 41 0.3699661 0.1499506 Indep
## 19 0.3730467 0.1402606 Indep
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231006_missNetReest_net",
"20231006_missNetReest_net",
"20231006_missNetReest_net",
"20231011_missNetReest_net",
"20231011_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 31 -4.460967 0.5810290 Indep
## 17 -4.391896 0.6414831 Indep
## 21 -4.251427 0.7467956 Indep
## 34 -4.235010 0.7519899 Indep
## 18 -3.938098 0.7389818 Indep
## 1 -3.887855 0.7437958 Indep
## parameter SE missModel
## 20 -0.54170566 0.3171848 Indep
## 24 -0.31403746 0.3400528 Indep
## 7 -0.24488276 0.3682580 Indep
## 19 -0.22044346 0.2847240 Indep
## 3 -0.07633112 0.2590887 Indep
## 29 -0.05303975 0.2633457 Indep
## parameter SE missModel
## 15 0.1147738 0.1344550 Indep
## 2 0.2438565 0.1288911 Indep
## 17 0.2489000 0.1466968 Indep
## 12 0.2720479 0.1266465 Indep
## 28 0.2784234 0.1299738 Indep
## 23 0.2830477 0.1217562 Indep
## parameter SE missModel
## 34 -4.117064 0.7813483 Indep
## 8 -3.866036 0.8600851 Indep
## 22 -3.575605 0.8914025 Indep
## 2 -3.486708 1.0883342 Indep
## 41 -3.341053 1.1678912 Indep
## 35 -3.238912 0.8570279 Indep
## parameter SE missModel
## 11 -1.6951627 0.7913244 Indep
## 45 -1.0758965 0.4881540 Indep
## 16 -0.6649367 0.4877096 Indep
## 24 -0.5145593 0.5383474 Indep
## 5 -0.5060344 0.3733735 Indep
## 26 -0.3739797 0.5027186 Indep
## parameter SE missModel
## 28 0.02784400 0.1756171 Indep
## 42 0.03321241 0.2095743 Indep
## 2 0.07508869 0.1726614 Indep
## 25 0.11500553 0.1873731 Indep
## 34 0.12822292 0.1818427 Indep
## 1 0.13671639 0.1920777 Indep
## parameter SE missModel
## 21 -4.186214 0.8410843 Indep
## 15 -3.781792 0.7574983 Indep
## 39 -3.415426 0.7451268 Indep
## 30 -3.321968 0.8540362 Indep
## 40 -3.248794 0.9085335 Indep
## 14 -3.190952 0.7852334 Indep
## parameter SE missModel
## 31 -0.9245958 0.4509537 Indep
## 19 -0.7191358 0.4810619 Indep
## 17 -0.4522326 0.4244435 Indep
## 36 -0.4507429 0.4204314 Indep
## 9 -0.4244602 0.3580949 Indep
## 8 -0.3759830 0.3769310 Indep
## parameter SE missModel
## 21 -0.32176800 0.2684163 Indep
## 15 0.01944446 0.2657635 Indep
## 42 0.07392221 0.2709689 Indep
## 6 0.13021100 0.2911042 Indep
## 41 0.19774587 0.2389817 Indep
## 40 0.21448691 0.2095101 Indep
## parameter SE missModel
## 1 -4.749368 0.4281441 Indep
## 3 -4.298512 0.5375795 Indep
## 2 -4.171256 0.5552278 Indep
## 8 -17812.803613 10.6302992 mcarERGM
## 5 -4.781183 0.5044661 mcarERGM
## 4 -4.535313 0.5246310 mcarERGM
## parameter SE missModel
## 2 0.1902914 0.3616075 Indep
## 3 0.4397348 0.2924576 Indep
## 1 0.6479313 0.2579289 Indep
## 7 0.1221795 0.3665846 mcarERGM
## 6 0.3604831 0.3471392 mcarERGM
## 4 0.6110184 0.2724064 mcarERGM
## parameter SE missModel
## 1 0.3854443 3.015044e-01 Indep
## 3 0.4696144 3.230628e-01 Indep
## 2 0.8850460 4.268974e-01 Indep
## 8 -22.0702887 2.680862e+115 mcarERGM
## 5 0.1396263 3.000309e-01 mcarERGM
## 4 0.2854597 2.927772e-01 mcarERGM
## parameter SE missModel
## 4 -11.67967 1.761618 Indep
## 49 -11.60233 1.668491 Indep
## 19 -10.86087 1.509740 Indep
## 33 -10.81512 1.729544 Indep
## 34 -10.27515 1.710123 Indep
## 26 -10.15169 1.589956 Indep
## parameter SE missModel
## 41 -0.29712466 0.2683588 Indep
## 33 -0.20487467 0.2645884 Indep
## 24 -0.18751920 0.2627259 Indep
## 12 -0.14089183 0.2527441 Indep
## 2 0.01010340 0.2545368 Indep
## 5 0.02821066 0.2418109 Indep
## parameter SE missModel
## 50 -0.164454094 0.2209245 Indep
## 25 -0.131737953 0.1781773 Indep
## 18 -0.093227217 0.2367062 Indep
## 29 -0.082722105 0.2330193 Indep
## 42 -0.040174880 0.2351467 Indep
## 31 0.006732098 0.1901246 Indep
The plots below are specifically plots with very sparse amounts of successful re-estimations.
# 10% missingness - Peter
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)
# note:
# net 3 is so messed up it has ONE converged estimation for this set
# 20231010_missNetReest_net3_missModel1_prop1_trial9.RData
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")
chunkMnarNames = c("20231011_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 1 0.06730704 2.831189 mnarErgm
## 6 0.41110006 2.700997 mnarErgm
## 10 0.44803684 3.233691 mnarErgm
## 7 0.62303532 3.283356 mnarErgm
## 8 1.28406049 3.326750 mnarErgm
## 9 1.40853051 3.443864 mnarErgm
## parameter SE missModel
## 5 -3.012581 1.1944335 mnarErgm
## 3 -2.811731 1.0764746 mnarErgm
## 11 -2.705572 0.9157626 mnarErgm
## 2 -2.677974 0.9916606 mnarErgm
## 4 -2.656809 0.9220835 mnarErgm
## 9 -2.590910 0.8378803 mnarErgm
## parameter SE missModel
## 5 3.275134 0.3421808 mnarErgm
## 9 3.344275 0.3413299 mnarErgm
## 2 3.398827 0.3541200 mnarErgm
## 8 3.406120 0.3646437 mnarErgm
## 3 3.407984 0.3722419 mnarErgm
## 4 3.423570 0.3405085 mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# the 'converged' model for net 3
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = 3
# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231010_missNetReest_net", chosenNetInd, sep =""), outputList)
# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
## parameter SE missModel
## 1 -89.26956 NA Indep
## parameter SE missModel
## 1 21.2504 NA Indep
## parameter SE missModel
## 1 2.785266 NA Indep
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
# 35% missingness
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)
# note:
# net 3 is still so messed up it has three converged estimation for this set (out of 200)
# 20231010_missNetReest_net3_missModel2_prop3.5_trial11.RData
# 20231010_missNetReest_net3_missModel3_prop3.5_trial12.RData
# 20231010_missNetReest_net3_missModel3_prop3.5_trial19.RData
# no mnars, so the loop won't work.
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")
chunkMnarNames = c("20231011_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 3 1.354497 2.909963 mnarErgm
## 18 2.906806 2.973310 mnarErgm
## 13 2.981297 3.469670 mnarErgm
## 9 3.067972 3.937823 mnarErgm
## 2 3.332524 3.476768 mnarErgm
## 4 3.417883 3.512137 mnarErgm
## parameter SE missModel
## 10 -4.981270 1.855869 mnarErgm
## 20 -4.887841 1.760132 mnarErgm
## 6 -4.644249 1.794332 mnarErgm
## 8 -4.517936 1.729852 mnarErgm
## 16 -4.301038 1.535839 mnarErgm
## 11 -4.210703 1.294873 mnarErgm
## parameter SE missModel
## 12 1.962097 0.2659402 mnarErgm
## 16 2.015492 0.2764007 mnarErgm
## 11 2.038997 0.2944418 mnarErgm
## 19 2.079809 0.3464684 mnarErgm
## 7 2.083504 0.3216842 mnarErgm
## 6 2.096097 0.2653546 mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
### A section specifically for the three re-estimations of net3 that made it
# no MNAR files, so removing them from the code below
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = 3
# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231010_missNetReest_net", chosenNetInd, sep =""), outputList)
# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
}
## parameter SE missModel
## 1 -59.62898 NA mcarERGM
## 2 -31.73605 NA Latent
## 3 -17.05925 NA Latent
## parameter SE missModel
## 1 15.661456 NA mcarERGM
## 2 1.350935 NA Latent
## 3 5.424564 NA Latent
## parameter SE missModel
## 1 -0.2770482 NA mcarERGM
## 3 -1.1609499 NA Latent
## 2 12.4001035 NA Latent
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
# 60% Peter
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)
# note:
# net 3 is still so messed up it has two converged estimation for this set (out of 200)
# 20231011_missNetReest_net3_missModel2_prop6_trial40
# 20231011_missNetReest_net3_missModel3_prop6_trial46
# no mnars, so the loop won't work.
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 1 -19.471644 NA Indep
## 2 -5.164890 1.341253 Indep
## 3 -12.764592 NA mcarERGM
## 12 -4.443987 1.069731 mnarErgm
## 8 -4.212305 1.364534 mnarErgm
## 26 -4.030651 1.201752 mnarErgm
## parameter SE missModel
## 2 -1.676416 0.2491544 Indep
## 1 3.753020 NA Indep
## 3 2.523763 NA mcarERGM
## 24 -16.309377 12.3213518 mnarErgm
## 31 -7.352189 4.5866855 mnarErgm
## 13 -6.388176 3.7709916 mnarErgm
## parameter SE missModel
## 1 2.895285 NA Indep
## 2 4.855019 0.6582781 Indep
## 3 1.836877 NA mcarERGM
## 20 1.610596 0.6224760 mnarErgm
## 30 1.847450 0.3734943 mnarErgm
## 24 1.860921 0.2396976 mnarErgm
### A section specifically for the three re-estimations of net3 that made it
# no MNAR files, so removing them from the code below
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = 3
# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231011_missNetReest_net", chosenNetInd, sep =""), outputList)
# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
## parameter SE missModel
## 1 -5.022512 NA mcarERGM
## 2 -62.259251 NA Latent
## parameter SE missModel
## 1 2.548622 NA mcarERGM
## 2 17.492617 NA Latent
## parameter SE missModel
## 1 -1.027786 NA mcarERGM
## 2 -2.287233 NA Latent
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
Literally nothing. No successful re-estimations for any missingness model for net 3.
# 35% missingness - Todd
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "TOdd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = 3
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 1 -2.2089033 1.681170 Indep
## 2 -2.7527563 1.471588 Latent
## 3 0.9039891 1.649547 mnarErgm
## parameter SE missModel
## 1 -1.112195 0.5174151 Indep
## 2 -1.248764 0.4822099 Latent
## 3 -1.302105 0.4873029 mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 1 2.437951 0.4559972 Indep
## 2 2.961205 0.5207720 Latent
## 3 1.119011 0.2363328 mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# 35% missingness - Todd
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "TOdd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = 3
# note:
# net 3 is so messed up it has ONE converged estimation for this set
# 20231010_missNetReest_net3_missModel1_prop1_trial9.RData
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231006_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 1 -3.962095 0.9023328 mcarERGM
## 2 -1.183762 1.5346827 Latent
## 3 -14.485997 NA mnarErgm
## parameter SE missModel
## 1 0.6043238 0.3158800 mcarERGM
## 2 -0.3741873 0.4741993 Latent
## 3 -1.6860124 NA mnarErgm
## parameter SE missModel
## 1 0.1417893 0.1547958 mcarERGM
## 2 0.5251103 0.1820051 Latent
## 3 9.7456273 NA mnarErgm
# plotting the failure rate
# data frame for the completely failed missingness combinations
# just for the formatting
tempFailData = failRateData[1,]
# todd 10s
tempFailData[1, ] = c(1, 3, 1, 1, "0")
tempFailData[2, ] = c(1, 3, 2, 1, "0")
tempFailData[3, ] = c(1, 3, 3, 1, "0")
tempFailData[4, ] = c(1, 3, 4, 1, "0")
# peter 10s
tempFailData[5, ] = c(1, 1, 1, 1, "NA")
tempFailData[6, ] = c(1, 1, 2, 1, "NA")
tempFailData[7, ] = c(1, 1, 3, 1, "NA")
tempFailData[8, ] = c(1, 3, 2, 1, "NA")
tempFailData[9, ] = c(1, 3, 3, 1, "NA")
tempFailData[10, ] = c(1, 3, 4, 1, "NA")
# todd 35s
tempFailData[11, ] = c(1, 3, 2, 3, "0")
# peter 35s
tempFailData[12, ] = c(1, 1, 1, 3, "NA")
tempFailData[13, ] = c(1, 1, 2, 3, "NA")
tempFailData[14, ] = c(1, 1, 3, 3, "NA")
tempFailData[15, ] = c(1, 3, 1, 3, "NA")
tempFailData[16, ] = c(1, 3, 4, 3, "NA")
# todd 60
tempFailData[17, ] = c(1, 3, 1, 6, "0")
# peter 60
tempFailData[18, ] = c(1, 1, 3, 6, "NA")
tempFailData[19, ] = c(1, 3, 1, 6, "NA")
tempFailData[20, ] = c(1, 3, 4, 6, "NA")
# object type stuff
tempFailData$failRate = as.numeric(tempFailData$failRate)
tempFailData$netInd = as.numeric(tempFailData$netInd)
tempFailData$missModel = as.numeric(tempFailData$missModel)
tempFailData$missProp = as.numeric(tempFailData$missProp)
# and bind
failRateData = rbind(failRateData, tempFailData)
# check for duplicates
if(any(duplicated(failRateData))){
# take only distinct elements
failRateData = distinct(failRateData)
}
# give the fail rate data (keep it untouched) a unique variable for the combination of missingness
failRatePlotData = failRateData %>%
group_by(missProp, missSave) %>%
mutate(missCondition = cur_group_id()) %>%
ungroup()
# turn variables into factors for plotting
failRatePlotData$missModel = factor(failRatePlotData$missModel, levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm"))
failRatePlotData$missCondition = factor(failRatePlotData$missCondition, levels = c(2,4,6,1,3,5), labels = c("Miss10", "Miss35", "Miss60", "Zero10", "Zero35", "Zero60"))
# subsetting per network because between-network comparisons aren't too informative
# within network comparisons where I have a specific group index for every combination of missSave and missProp (6 categories total)
selectedNets = c(1,2,3,4,5,6)
for(chosenNetInd in selectedNets){
# start with a subset of net 2
failRateSubset = failRatePlotData %>%
filter(netInd == chosenNetInd)
# and now we have plot-ready data
print(ggplot(failRateSubset, aes(fill = missModel, y = failRate, x = missCondition)) +
geom_bar(position="dodge", stat="identity") +
xlab("Missingness condition") +
ylab("Failure rate") +
ggtitle(paste("Failure rate - Net", chosenNetInd)) +
theme_classic() +
theme(axis.ticks.x = element_blank()) +
ylim(0,1) )
}
Relative bias was defined as:
\[rBias = \frac{\widetilde{\theta} - \theta}{\theta},\]
with \(\widetilde{\theta}\) representing the re-estimated parameter value and \(\theta\) representing the true parameter value.
The relative standard error is defined as: \[rSe = \frac{SE(\widetilde{\theta})}{SE(\theta)}.\]
# change some variable types
relBiasData$netInd = as.factor(relBiasData$netInd)
relSeData$netInd = as.factor(relSeData$netInd)
# rename Peter and Todd to Miss and Zero respectively
library(stringr)
relBiasData$missSave = str_replace(string = relBiasData$missSave,
pattern = "Peter",
replacement = "Miss")
relBiasData$missSave = str_replace(string = relBiasData$missSave,
pattern = "Todd",
replacement = "Zero")
# and for standard errors
relSeData$missSave = str_replace(string = relSeData$missSave,
pattern = "Peter",
replacement = "Miss")
relSeData$missSave = str_replace(string = relSeData$missSave,
pattern = "Todd",
replacement = "Zero")
missPropChoices = c(1, 3, 6)
paraChoices = c("edges", "altStar", "gwesp")
# loop it
for(chosenProp in missPropChoices){
for(chosenPara in paraChoices){
# I don't even know what kind of plot this is...
# but I need to do some wrangling and subsetting.
# need to subset some stuff
relBiasSub = relBiasData %>%
filter(propMiss == chosenProp & paraName == chosenPara)
# use the 2.5% and 97.5% quantiles for the axis limits because some of them have some VERY extreme values
tempBiasBounds = c(quantile(relBiasSub$rBias, probs = c(0.025, 0.975)))
# so the plot would be something like
relBiasPlot = ggplot(relBiasSub, aes(x = netInd, y = rBias, fill = missModel)) +
geom_boxplot(position = position_dodge()) +
facet_wrap(~missSave) +
theme_classic() +
geom_hline(yintercept = 0, col = "black", lty = 2) +
ylim((tempBiasBounds + c(-0.5, 0.5))) + # and add a bit of wiggle room
xlab("Network no.") +
ylab("Relative bias") +
ggtitle(paste("Proportion ", ifelse(chosenProp == 3, yes = 3.5*10, no = chosenProp*10), "% - ", chosenPara, sep = "")) # the ifelse is because of how I've regexed 35% missingness
# print plot
print(relBiasPlot)
# and another one for variance (technically relative standard error... but yeah)
relSeSub = relSeData %>%
filter(propMiss == chosenProp & paraName == chosenPara)
# just the 97.5% quantile because this is strictly positive
tempVarUpper = quantile(relSeSub$rSe, probs = c(0.975), na.rm = TRUE)
# so the plot would be something like
relSePlot = ggplot(relSeSub, aes(x = netInd, y = rSe, fill = missModel)) +
geom_boxplot(position = position_dodge()) +
facet_wrap(~missSave) +
theme_classic() +
geom_hline(yintercept = 1, col = "black", lty = 2) +
ylim(0, tempVarUpper + 0.5) + # some wiggle room
xlab("Network no.") +
ylab("Relative standard error") +
ggtitle(paste("Proportion ", ifelse(chosenProp == 3, yes = 3.5*10, no = chosenProp*10), "% - ", chosenPara, sep = "")) # the ifelse is because of how I've regexed 35% missingness
# print plot
print(relSePlot)
}
}
## Warning: Removed 40 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 36 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 25 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 51 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 26 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 19 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 38 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 44 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 35 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 50 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 45 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 23 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 76 rows containing non-finite values (`stat_boxplot()`).
# a repeat without the imposed axes just for comparison
for(chosenProp in missPropChoices){
for(chosenPara in paraChoices){
# I don't even know what kind of plot this is...
# but I need to do some wrangling and subsetting.
# need to subset some stuff
relBiasSub = relBiasData %>%
filter(propMiss == chosenProp & paraName == chosenPara)
# so the plot would be something like
relBiasPlot = ggplot(relBiasSub, aes(x = netInd, y = rBias, fill = missModel)) +
geom_boxplot(position = position_dodge()) +
facet_wrap(~missSave) +
theme_classic() +
geom_hline(yintercept = 0, col = "black", lty = 2) +
xlab("Network no.") +
ylab("Relative bias") +
ggtitle(paste("Proportion ", ifelse(chosenProp == 3, yes = 3.5*10, no = chosenProp*10), "% - ", chosenPara, sep = "")) # the ifelse is because of how I've regexed 35% missingness
# print plot
print(relBiasPlot)
# and another one for variance (technically relative standard error... but yeah)
relSeSub = relSeData %>%
filter(propMiss == chosenProp & paraName == chosenPara)
# so the plot would be something like
relSePlot = ggplot(relSeSub, aes(x = netInd, y = rSe, fill = missModel)) +
geom_boxplot(position = position_dodge()) +
facet_wrap(~missSave) +
theme_classic() +
geom_hline(yintercept = 1, col = "black", lty = 2) +
xlab("Network no.") +
ylab("Relative standard error") +
ggtitle(paste("Proportion ", ifelse(chosenProp == 3, yes = 3.5*10, no = chosenProp*10), "% - ", chosenPara, sep = "")) # the ifelse is because of how I've regexed 35% missingness
# print plot
print(relSePlot)
}
}
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 6 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 9 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 17 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 49 rows containing non-finite values (`stat_boxplot()`).
As we see in the MNAR ERGM, the parameter estimates are biased in an intuitve fashion when we consider the value of the coefficient for the entrainment parameter.
This section (and estimates) are really to look at how differences in the entrainment parameter can affect parameter estimates and potentially various metrics.
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
# this section only has one network, but I need multiple coefficient sets.
chunkNetworks = 4
chunkCoefSets = c(7, 8, 9, 10, 11, 12, 13, 14, 15)
# TODO: vector of file names for regex
chunkMnarNames = c("20240227_missErgmNetReest_Miss",
"20240306_missErgmNetReest_Miss")
# ... add a name indicator to choose what the file name is
nameInd = c(1, 1, 1, 1, 1, 2, 2, 2, 2)
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
coefSetList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
## regex the MNAR files
# combine the two indices, netOut isnt used because only MNAR ergms are used here (technically not 9 because 0, but yeah.)
combinedNetOutList = c(grep(paste(chunkMnarNames[1], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList),
grep(paste(chunkMnarNames[2], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList))
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# always 'MNAR'. Don't think this'll be used but yeah
missModelList[[fileInd]] = 4
# and always 35% missingness
propMissList[[fileInd]] = 3
# I do however need the coefset list
coefSetList[[fileInd]] = as.integer(sub(".*?_coefSet.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# we now have all the objects we need
# let's check how many of each coefset made it
table(unlist(coefSetList))
##
## 7 8 9 10 11 12 13 14 15
## 19 34 42 43 44 27 34 38 41
# and now figure out how we're plotting this
# first we need to know which coefsets refer to what value
entrValue = c(-1, -0.5, 0, 0.5, 1, -0.75, -0.25, 0.25, 0.75)
# initialise the final plot data
entrPlotData = tibble()
# next we want to take out the re-estimated coefficients
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# including edges
# I'm bastardising the prepMissPlot function to include the coef set index
## Check to make sure all the lists are of the same length
listLengths = lapply(list(modelResList, modelSeList, missModelList, propMissList, coefSetList), FUN = length)
## A unit test that spits out an error if there's more than one unique length in the list of lengths
if( length(unique(listLengths)) != 1 ){
stop("Lists have differing lengths")
}
# subset the lists to only be a single chosen proportion
chosenPropIndex = propMissList == 3
modelResChosenPropList = modelResList[chosenPropIndex]
modelSeChosenPropList = modelSeList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
# Specify indices for the chosen parameter
# doing it this way because parameter names get very lengthy
paraIndex = c(1:3)
modelParaNames = c("edges", "altStar", "gwesp")
chosenParaIndex = paraIndex[modelParaNames == chosenParaName]
# making the dataset
plotData = data.frame(parameter = c(unlist(lapply(modelResChosenPropList, `[[`, chosenParaIndex) )),
SE = c(unlist(lapply(modelSeChosenPropList, `[[`, chosenParaIndex))),
missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "mcarERGM", "Latent", "mnarErgm")),
coefSet = unlist(coefSetList))
# brief inspection just to make sure nothing's wrong
head(plotData) %>%
print()
# now that we have the 'raw' plot data for a given parameter, let's transform it to a mean and 95% CI to plot
# basically for each coefset, I want a mean and 95% CI
tempEntrPlotData = plotData %>%
group_by(coefSet) %>%
summarize(Mean = mean(parameter),
Lower = quantile(parameter, probs = 0.025),
Upper = quantile(parameter, probs = 0.975)) %>%
add_column(Parameter = chosenParaName)
# and bind the temp datasets
entrPlotData = bind_rows(entrPlotData, tempEntrPlotData)
}
## parameter SE missModel coefSet
## 1 -0.66292277 1.776960 mnarErgm 10
## 2 3.53737524 5.521055 mnarErgm 10
## 3 -0.01855624 2.498809 mnarErgm 10
## 4 0.64028293 2.548163 mnarErgm 10
## 5 -0.22500862 1.847154 mnarErgm 10
## 6 0.63681549 2.359213 mnarErgm 10
## parameter SE missModel coefSet
## 1 -1.420789 0.4377770 mnarErgm 10
## 2 -2.765767 1.3530311 mnarErgm 10
## 3 -1.875441 0.6112890 mnarErgm 10
## 4 -2.055407 0.5953179 mnarErgm 10
## 5 -1.802903 0.4268378 mnarErgm 10
## 6 -1.898855 0.5820178 mnarErgm 10
## parameter SE missModel coefSet
## 1 2.153231 0.4786744 mnarErgm 10
## 2 2.658408 0.5277056 mnarErgm 10
## 3 2.712541 0.5185533 mnarErgm 10
## 4 2.731668 0.4606359 mnarErgm 10
## 5 2.665940 0.5015408 mnarErgm 10
## 6 2.419708 0.5298171 mnarErgm 10
# basically want to recode the coefsets into meaningful-ish values.. so:
entrPlotData = entrPlotData %>%
mutate(entrValue = recode(coefSet, `7` = -1, `8` = -0.5, `9` = 0, `10` = 0.5, `11` = 1, `12` = -0.75, `13` = -0.25, `14` = 0.25, `15` = 0.75))
# and plot it
entrPlotData %>%
filter(Parameter == "edges") %>%
ggplot(aes(x = entrValue, y = Mean)) +
geom_line() +
geom_hline(yintercept = -0.77, col = "darkblue") +
geom_ribbon(aes(ymin = Lower, ymax = Upper), alpha = 0.2) +
labs( x = "Entrainment parameter value", y = "Mean (95% CI) edges estimate" ) +
ggtitle("Edges") +
theme_classic()
entrPlotData %>%
filter(Parameter == "altStar") %>%
ggplot(aes(x = entrValue, y = Mean)) +
geom_line() +
geom_hline(yintercept = -1.88, col = "darkblue") +
geom_ribbon(aes(ymin = Lower, ymax = Upper), alpha = 0.2)+
labs( x = "Entrainment parameter value", y = "Mean (95% CI) altStar estimate")+
ggtitle("AltStar") +
theme_classic()
entrPlotData %>%
filter(Parameter == "gwesp") %>%
ggplot(aes(x = entrValue, y = Mean)) +
geom_line() +
geom_hline(yintercept = 3.10, col = "darkblue") +
geom_ribbon(aes(ymin = Lower, ymax = Upper), alpha = 0.2)+
labs( x = "Entrainment parameter value", y = "Mean (95% CI) gwesp estimate")+
ggtitle("Gwesp") +
theme_classic()